{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 3.网页排名\n",
    "## 3.1 网页点击率排名表"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\ProgramData\\Anaconda3\\lib\\site-packages\\pymysql\\cursors.py:170: Warning: (1366, \"Incorrect string value: '\\\\xD6\\\\xD0\\\\xB9\\\\xFA\\\\xB1\\\\xEA...' for column 'VARIABLE_VALUE' at row 478\")\n",
      "  result = self._query(query)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>点击次数</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>网址</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/23.html</th>\n",
       "      <td>944</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/shuifa/slb/2012111978933.html</th>\n",
       "      <td>643</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/43.html</th>\n",
       "      <td>516</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/ask/browse_a1401.html</th>\n",
       "      <td>507</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/9.html</th>\n",
       "      <td>505</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/hunyin/lhlawlhxy/20110707137693.html</th>\n",
       "      <td>484</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/ask/exp/13655.html</th>\n",
       "      <td>379</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/shuifa/slb/2012111978933_2.html</th>\n",
       "      <td>378</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/41.html</th>\n",
       "      <td>369</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/117.html</th>\n",
       "      <td>356</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/15.html</th>\n",
       "      <td>309</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/ask/online/139.html</th>\n",
       "      <td>301</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/hunyin/lhlawlhxy/20110707137693_2.html</th>\n",
       "      <td>298</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/ask/online/138.html</th>\n",
       "      <td>245</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/ask/exp/8495.html</th>\n",
       "      <td>240</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/11.html</th>\n",
       "      <td>210</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/ask/question_925675.html</th>\n",
       "      <td>197</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/minshi/fagui/2013051382463_4.html</th>\n",
       "      <td>175</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/21.html</th>\n",
       "      <td>169</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/7.html</th>\n",
       "      <td>142</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                    点击次数\n",
       "网址                                                      \n",
       "http://www.lawtime.cn/faguizt/23.html                944\n",
       "http://www.lawtime.cn/info/shuifa/slb/201211197...   643\n",
       "http://www.lawtime.cn/faguizt/43.html                516\n",
       "http://www.lawtime.cn/ask/browse_a1401.html          507\n",
       "http://www.lawtime.cn/faguizt/9.html                 505\n",
       "http://www.lawtime.cn/info/hunyin/lhlawlhxy/201...   484\n",
       "http://www.lawtime.cn/ask/exp/13655.html             379\n",
       "http://www.lawtime.cn/info/shuifa/slb/201211197...   378\n",
       "http://www.lawtime.cn/faguizt/41.html                369\n",
       "http://www.lawtime.cn/faguizt/117.html               356\n",
       "http://www.lawtime.cn/faguizt/15.html                309\n",
       "http://www.lawtime.cn/ask/online/139.html            301\n",
       "http://www.lawtime.cn/info/hunyin/lhlawlhxy/201...   298\n",
       "http://www.lawtime.cn/ask/online/138.html            245\n",
       "http://www.lawtime.cn/ask/exp/8495.html              240\n",
       "http://www.lawtime.cn/faguizt/11.html                210\n",
       "http://www.lawtime.cn/ask/question_925675.html       197\n",
       "http://www.lawtime.cn/info/minshi/fagui/2013051...   175\n",
       "http://www.lawtime.cn/faguizt/21.html                169\n",
       "http://www.lawtime.cn/faguizt/7.html                 142"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "from sqlalchemy import create_engine\n",
    "\n",
    "engine = create_engine('mysql+pymysql://root:@localhost/wangye?charset=utf8')\n",
    "sql = pd.read_sql('all_gzdata', engine, chunksize = 10000)\n",
    "# g = [pd.merge(f,i[['fullURLId','fullURL','realIP']],right_on = 'realIP',left_index=True,how ='left') for i in sql]\n",
    "\n",
    "def clickfreq(i): #自定义统计函数\n",
    "    j = i[['fullURL','fullURLId','realIP']][i['fullURL'].str.contains('\\.html')]\n",
    "    return j\n",
    "\n",
    "counts1 = [clickfreq(i) for i in sql] # 分块统计各个IP的出现次数\n",
    "counts1 = pd.concat(counts1)\n",
    " \n",
    "counts1_ = counts1['fullURL'].value_counts()\n",
    "counts1_ = pd.DataFrame(counts1_)\n",
    "\n",
    "counts1_.columns = [u'点击次数']\n",
    "counts1_.index.name = u'网址'\n",
    "a = counts1_.sort_values(u'点击次数',ascending=False).iloc[:20,:]\n",
    "a"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3.2 点击次数大于50"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:2: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n",
      "  \n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>点击次数</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>网址</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/ask/exp/13445.html</th>\n",
       "      <td>129</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/ask/exp/13653.html</th>\n",
       "      <td>57</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/ask/exp/13655.html</th>\n",
       "      <td>379</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/ask/exp/17357.html</th>\n",
       "      <td>86</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/ask/exp/8495.html</th>\n",
       "      <td>240</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/ask/online/138.html</th>\n",
       "      <td>245</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/ask/online/139.html</th>\n",
       "      <td>301</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/11.html</th>\n",
       "      <td>210</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/117.html</th>\n",
       "      <td>356</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/131.html</th>\n",
       "      <td>121</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/15.html</th>\n",
       "      <td>309</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/168.html</th>\n",
       "      <td>77</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/19.html</th>\n",
       "      <td>76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/21.html</th>\n",
       "      <td>169</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/23.html</th>\n",
       "      <td>944</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/25.html</th>\n",
       "      <td>71</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/27.html</th>\n",
       "      <td>53</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/291.html</th>\n",
       "      <td>79</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/39.html</th>\n",
       "      <td>126</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/41.html</th>\n",
       "      <td>369</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/43.html</th>\n",
       "      <td>516</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/79.html</th>\n",
       "      <td>127</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/91.html</th>\n",
       "      <td>86</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/99.html</th>\n",
       "      <td>81</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/hetong/ldht/201311152872128_2.html</th>\n",
       "      <td>95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/hetong/ldht/201311152872128_3.html</th>\n",
       "      <td>62</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/hunyin/lhlawlhxy/20110707137693.html</th>\n",
       "      <td>484</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/hunyin/lhlawlhxy/20110707137693_2.html</th>\n",
       "      <td>298</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/jiaotong/jtlawjtxgfg/201411273309942_3.html</th>\n",
       "      <td>65</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/laodong/gongshixiujia/201412253312096.html</th>\n",
       "      <td>90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/laodong/laodongbaoxian/bx/20131216141593.html</th>\n",
       "      <td>62</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/minshi/fagui/2013051382463_4.html</th>\n",
       "      <td>175</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/minshi/fagui/2013051382463_5.html</th>\n",
       "      <td>53</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/shuifa/grsds/201402172880722.html</th>\n",
       "      <td>80</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/shuifa/grsds/201410313308870.html</th>\n",
       "      <td>91</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/shuifa/slb/2012111978933.html</th>\n",
       "      <td>643</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/shuifa/slb/2012111978933_2.html</th>\n",
       "      <td>378</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/shuifa/slb/201401232878784.html</th>\n",
       "      <td>56</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                    点击次数\n",
       "网址                                                      \n",
       "http://www.lawtime.cn/ask/exp/13445.html             129\n",
       "http://www.lawtime.cn/ask/exp/13653.html              57\n",
       "http://www.lawtime.cn/ask/exp/13655.html             379\n",
       "http://www.lawtime.cn/ask/exp/17357.html              86\n",
       "http://www.lawtime.cn/ask/exp/8495.html              240\n",
       "http://www.lawtime.cn/ask/online/138.html            245\n",
       "http://www.lawtime.cn/ask/online/139.html            301\n",
       "http://www.lawtime.cn/faguizt/11.html                210\n",
       "http://www.lawtime.cn/faguizt/117.html               356\n",
       "http://www.lawtime.cn/faguizt/131.html               121\n",
       "http://www.lawtime.cn/faguizt/15.html                309\n",
       "http://www.lawtime.cn/faguizt/168.html                77\n",
       "http://www.lawtime.cn/faguizt/19.html                 76\n",
       "http://www.lawtime.cn/faguizt/21.html                169\n",
       "http://www.lawtime.cn/faguizt/23.html                944\n",
       "http://www.lawtime.cn/faguizt/25.html                 71\n",
       "http://www.lawtime.cn/faguizt/27.html                 53\n",
       "http://www.lawtime.cn/faguizt/291.html                79\n",
       "http://www.lawtime.cn/faguizt/39.html                126\n",
       "http://www.lawtime.cn/faguizt/41.html                369\n",
       "http://www.lawtime.cn/faguizt/43.html                516\n",
       "http://www.lawtime.cn/faguizt/79.html                127\n",
       "http://www.lawtime.cn/faguizt/91.html                 86\n",
       "http://www.lawtime.cn/faguizt/99.html                 81\n",
       "http://www.lawtime.cn/info/hetong/ldht/20131115...    95\n",
       "http://www.lawtime.cn/info/hetong/ldht/20131115...    62\n",
       "http://www.lawtime.cn/info/hunyin/lhlawlhxy/201...   484\n",
       "http://www.lawtime.cn/info/hunyin/lhlawlhxy/201...   298\n",
       "http://www.lawtime.cn/info/jiaotong/jtlawjtxgfg...    65\n",
       "http://www.lawtime.cn/info/laodong/gongshixiuji...    90\n",
       "http://www.lawtime.cn/info/laodong/laodongbaoxi...    62\n",
       "http://www.lawtime.cn/info/minshi/fagui/2013051...   175\n",
       "http://www.lawtime.cn/info/minshi/fagui/2013051...    53\n",
       "http://www.lawtime.cn/info/shuifa/grsds/2014021...    80\n",
       "http://www.lawtime.cn/info/shuifa/grsds/2014103...    91\n",
       "http://www.lawtime.cn/info/shuifa/slb/201211197...   643\n",
       "http://www.lawtime.cn/info/shuifa/slb/201211197...   378\n",
       "http://www.lawtime.cn/info/shuifa/slb/201401232...    56"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b = counts1_.reset_index()\n",
    "c = b[b[u'点击次数']>50][b[u'网址'].str.contains('/\\d+?_*\\d+?\\.html')]\n",
    "c.set_index(u'网址',inplace=True)\n",
    "c.sort_index(inplace = True)\n",
    "# savetosql(c, 'count355')# 并保存到数据库中\n",
    "c"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3.3 翻页网页统计，对浏览网页翻页的情况进行统计"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>点击次数</th>\n",
       "      <th>websitemain</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>网址</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/ask/exp/13445.html</th>\n",
       "      <td>129</td>\n",
       "      <td>www.lawtime.cn/ask/exp/13445</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/ask/exp/13653.html</th>\n",
       "      <td>57</td>\n",
       "      <td>www.lawtime.cn/ask/exp/13653</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/ask/exp/13655.html</th>\n",
       "      <td>379</td>\n",
       "      <td>www.lawtime.cn/ask/exp/13655</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/ask/exp/17357.html</th>\n",
       "      <td>86</td>\n",
       "      <td>www.lawtime.cn/ask/exp/17357</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/ask/exp/8495.html</th>\n",
       "      <td>240</td>\n",
       "      <td>www.lawtime.cn/ask/exp/8495</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/ask/online/138.html</th>\n",
       "      <td>245</td>\n",
       "      <td>www.lawtime.cn/ask/online/138</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/ask/online/139.html</th>\n",
       "      <td>301</td>\n",
       "      <td>www.lawtime.cn/ask/online/139</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/11.html</th>\n",
       "      <td>210</td>\n",
       "      <td>www.lawtime.cn/faguizt/11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/117.html</th>\n",
       "      <td>356</td>\n",
       "      <td>www.lawtime.cn/faguizt/117</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/131.html</th>\n",
       "      <td>121</td>\n",
       "      <td>www.lawtime.cn/faguizt/131</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/15.html</th>\n",
       "      <td>309</td>\n",
       "      <td>www.lawtime.cn/faguizt/15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/168.html</th>\n",
       "      <td>77</td>\n",
       "      <td>www.lawtime.cn/faguizt/168</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/19.html</th>\n",
       "      <td>76</td>\n",
       "      <td>www.lawtime.cn/faguizt/19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/21.html</th>\n",
       "      <td>169</td>\n",
       "      <td>www.lawtime.cn/faguizt/21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/23.html</th>\n",
       "      <td>944</td>\n",
       "      <td>www.lawtime.cn/faguizt/23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/25.html</th>\n",
       "      <td>71</td>\n",
       "      <td>www.lawtime.cn/faguizt/25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/27.html</th>\n",
       "      <td>53</td>\n",
       "      <td>www.lawtime.cn/faguizt/27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/291.html</th>\n",
       "      <td>79</td>\n",
       "      <td>www.lawtime.cn/faguizt/291</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/39.html</th>\n",
       "      <td>126</td>\n",
       "      <td>www.lawtime.cn/faguizt/39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/41.html</th>\n",
       "      <td>369</td>\n",
       "      <td>www.lawtime.cn/faguizt/41</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/43.html</th>\n",
       "      <td>516</td>\n",
       "      <td>www.lawtime.cn/faguizt/43</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/79.html</th>\n",
       "      <td>127</td>\n",
       "      <td>www.lawtime.cn/faguizt/79</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/91.html</th>\n",
       "      <td>86</td>\n",
       "      <td>www.lawtime.cn/faguizt/91</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/faguizt/99.html</th>\n",
       "      <td>81</td>\n",
       "      <td>www.lawtime.cn/faguizt/99</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/hetong/ldht/201311152872128_2.html</th>\n",
       "      <td>95</td>\n",
       "      <td>www.lawtime.cn/info/hetong/ldht/201311152872128</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/hetong/ldht/201311152872128_3.html</th>\n",
       "      <td>62</td>\n",
       "      <td>www.lawtime.cn/info/hetong/ldht/201311152872128</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/hunyin/lhlawlhxy/20110707137693.html</th>\n",
       "      <td>484</td>\n",
       "      <td>www.lawtime.cn/info/hunyin/lhlawlhxy/201107071...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/hunyin/lhlawlhxy/20110707137693_2.html</th>\n",
       "      <td>298</td>\n",
       "      <td>www.lawtime.cn/info/hunyin/lhlawlhxy/201107071...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/jiaotong/jtlawjtxgfg/201411273309942_3.html</th>\n",
       "      <td>65</td>\n",
       "      <td>www.lawtime.cn/info/jiaotong/jtlawjtxgfg/20141...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/laodong/gongshixiujia/201412253312096.html</th>\n",
       "      <td>90</td>\n",
       "      <td>www.lawtime.cn/info/laodong/gongshixiujia/2014...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/laodong/laodongbaoxian/bx/20131216141593.html</th>\n",
       "      <td>62</td>\n",
       "      <td>www.lawtime.cn/info/laodong/laodongbaoxian/bx/...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/minshi/fagui/2013051382463_4.html</th>\n",
       "      <td>175</td>\n",
       "      <td>www.lawtime.cn/info/minshi/fagui/2013051382463</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/minshi/fagui/2013051382463_5.html</th>\n",
       "      <td>53</td>\n",
       "      <td>www.lawtime.cn/info/minshi/fagui/2013051382463</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/shuifa/grsds/201402172880722.html</th>\n",
       "      <td>80</td>\n",
       "      <td>www.lawtime.cn/info/shuifa/grsds/201402172880722</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/shuifa/grsds/201410313308870.html</th>\n",
       "      <td>91</td>\n",
       "      <td>www.lawtime.cn/info/shuifa/grsds/201410313308870</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/shuifa/slb/2012111978933.html</th>\n",
       "      <td>643</td>\n",
       "      <td>www.lawtime.cn/info/shuifa/slb/2012111978933</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/shuifa/slb/2012111978933_2.html</th>\n",
       "      <td>378</td>\n",
       "      <td>www.lawtime.cn/info/shuifa/slb/2012111978933</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/shuifa/slb/201401232878784.html</th>\n",
       "      <td>56</td>\n",
       "      <td>www.lawtime.cn/info/shuifa/slb/201401232878784</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                    点击次数  \\\n",
       "网址                                                         \n",
       "http://www.lawtime.cn/ask/exp/13445.html             129   \n",
       "http://www.lawtime.cn/ask/exp/13653.html              57   \n",
       "http://www.lawtime.cn/ask/exp/13655.html             379   \n",
       "http://www.lawtime.cn/ask/exp/17357.html              86   \n",
       "http://www.lawtime.cn/ask/exp/8495.html              240   \n",
       "http://www.lawtime.cn/ask/online/138.html            245   \n",
       "http://www.lawtime.cn/ask/online/139.html            301   \n",
       "http://www.lawtime.cn/faguizt/11.html                210   \n",
       "http://www.lawtime.cn/faguizt/117.html               356   \n",
       "http://www.lawtime.cn/faguizt/131.html               121   \n",
       "http://www.lawtime.cn/faguizt/15.html                309   \n",
       "http://www.lawtime.cn/faguizt/168.html                77   \n",
       "http://www.lawtime.cn/faguizt/19.html                 76   \n",
       "http://www.lawtime.cn/faguizt/21.html                169   \n",
       "http://www.lawtime.cn/faguizt/23.html                944   \n",
       "http://www.lawtime.cn/faguizt/25.html                 71   \n",
       "http://www.lawtime.cn/faguizt/27.html                 53   \n",
       "http://www.lawtime.cn/faguizt/291.html                79   \n",
       "http://www.lawtime.cn/faguizt/39.html                126   \n",
       "http://www.lawtime.cn/faguizt/41.html                369   \n",
       "http://www.lawtime.cn/faguizt/43.html                516   \n",
       "http://www.lawtime.cn/faguizt/79.html                127   \n",
       "http://www.lawtime.cn/faguizt/91.html                 86   \n",
       "http://www.lawtime.cn/faguizt/99.html                 81   \n",
       "http://www.lawtime.cn/info/hetong/ldht/20131115...    95   \n",
       "http://www.lawtime.cn/info/hetong/ldht/20131115...    62   \n",
       "http://www.lawtime.cn/info/hunyin/lhlawlhxy/201...   484   \n",
       "http://www.lawtime.cn/info/hunyin/lhlawlhxy/201...   298   \n",
       "http://www.lawtime.cn/info/jiaotong/jtlawjtxgfg...    65   \n",
       "http://www.lawtime.cn/info/laodong/gongshixiuji...    90   \n",
       "http://www.lawtime.cn/info/laodong/laodongbaoxi...    62   \n",
       "http://www.lawtime.cn/info/minshi/fagui/2013051...   175   \n",
       "http://www.lawtime.cn/info/minshi/fagui/2013051...    53   \n",
       "http://www.lawtime.cn/info/shuifa/grsds/2014021...    80   \n",
       "http://www.lawtime.cn/info/shuifa/grsds/2014103...    91   \n",
       "http://www.lawtime.cn/info/shuifa/slb/201211197...   643   \n",
       "http://www.lawtime.cn/info/shuifa/slb/201211197...   378   \n",
       "http://www.lawtime.cn/info/shuifa/slb/201401232...    56   \n",
       "\n",
       "                                                                                          websitemain  \n",
       "网址                                                                                                     \n",
       "http://www.lawtime.cn/ask/exp/13445.html                                 www.lawtime.cn/ask/exp/13445  \n",
       "http://www.lawtime.cn/ask/exp/13653.html                                 www.lawtime.cn/ask/exp/13653  \n",
       "http://www.lawtime.cn/ask/exp/13655.html                                 www.lawtime.cn/ask/exp/13655  \n",
       "http://www.lawtime.cn/ask/exp/17357.html                                 www.lawtime.cn/ask/exp/17357  \n",
       "http://www.lawtime.cn/ask/exp/8495.html                                   www.lawtime.cn/ask/exp/8495  \n",
       "http://www.lawtime.cn/ask/online/138.html                               www.lawtime.cn/ask/online/138  \n",
       "http://www.lawtime.cn/ask/online/139.html                               www.lawtime.cn/ask/online/139  \n",
       "http://www.lawtime.cn/faguizt/11.html                                       www.lawtime.cn/faguizt/11  \n",
       "http://www.lawtime.cn/faguizt/117.html                                     www.lawtime.cn/faguizt/117  \n",
       "http://www.lawtime.cn/faguizt/131.html                                     www.lawtime.cn/faguizt/131  \n",
       "http://www.lawtime.cn/faguizt/15.html                                       www.lawtime.cn/faguizt/15  \n",
       "http://www.lawtime.cn/faguizt/168.html                                     www.lawtime.cn/faguizt/168  \n",
       "http://www.lawtime.cn/faguizt/19.html                                       www.lawtime.cn/faguizt/19  \n",
       "http://www.lawtime.cn/faguizt/21.html                                       www.lawtime.cn/faguizt/21  \n",
       "http://www.lawtime.cn/faguizt/23.html                                       www.lawtime.cn/faguizt/23  \n",
       "http://www.lawtime.cn/faguizt/25.html                                       www.lawtime.cn/faguizt/25  \n",
       "http://www.lawtime.cn/faguizt/27.html                                       www.lawtime.cn/faguizt/27  \n",
       "http://www.lawtime.cn/faguizt/291.html                                     www.lawtime.cn/faguizt/291  \n",
       "http://www.lawtime.cn/faguizt/39.html                                       www.lawtime.cn/faguizt/39  \n",
       "http://www.lawtime.cn/faguizt/41.html                                       www.lawtime.cn/faguizt/41  \n",
       "http://www.lawtime.cn/faguizt/43.html                                       www.lawtime.cn/faguizt/43  \n",
       "http://www.lawtime.cn/faguizt/79.html                                       www.lawtime.cn/faguizt/79  \n",
       "http://www.lawtime.cn/faguizt/91.html                                       www.lawtime.cn/faguizt/91  \n",
       "http://www.lawtime.cn/faguizt/99.html                                       www.lawtime.cn/faguizt/99  \n",
       "http://www.lawtime.cn/info/hetong/ldht/20131115...    www.lawtime.cn/info/hetong/ldht/201311152872128  \n",
       "http://www.lawtime.cn/info/hetong/ldht/20131115...    www.lawtime.cn/info/hetong/ldht/201311152872128  \n",
       "http://www.lawtime.cn/info/hunyin/lhlawlhxy/201...  www.lawtime.cn/info/hunyin/lhlawlhxy/201107071...  \n",
       "http://www.lawtime.cn/info/hunyin/lhlawlhxy/201...  www.lawtime.cn/info/hunyin/lhlawlhxy/201107071...  \n",
       "http://www.lawtime.cn/info/jiaotong/jtlawjtxgfg...  www.lawtime.cn/info/jiaotong/jtlawjtxgfg/20141...  \n",
       "http://www.lawtime.cn/info/laodong/gongshixiuji...  www.lawtime.cn/info/laodong/gongshixiujia/2014...  \n",
       "http://www.lawtime.cn/info/laodong/laodongbaoxi...  www.lawtime.cn/info/laodong/laodongbaoxian/bx/...  \n",
       "http://www.lawtime.cn/info/minshi/fagui/2013051...     www.lawtime.cn/info/minshi/fagui/2013051382463  \n",
       "http://www.lawtime.cn/info/minshi/fagui/2013051...     www.lawtime.cn/info/minshi/fagui/2013051382463  \n",
       "http://www.lawtime.cn/info/shuifa/grsds/2014021...   www.lawtime.cn/info/shuifa/grsds/201402172880722  \n",
       "http://www.lawtime.cn/info/shuifa/grsds/2014103...   www.lawtime.cn/info/shuifa/grsds/201410313308870  \n",
       "http://www.lawtime.cn/info/shuifa/slb/201211197...       www.lawtime.cn/info/shuifa/slb/2012111978933  \n",
       "http://www.lawtime.cn/info/shuifa/slb/201211197...       www.lawtime.cn/info/shuifa/slb/2012111978933  \n",
       "http://www.lawtime.cn/info/shuifa/slb/201401232...     www.lawtime.cn/info/shuifa/slb/201401232878784  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 获取网址中以http://与.html中间的主体部分,即去掉翻页的内容，即去掉尾部\"_d\"\n",
    "import re\n",
    "import numpy as np\n",
    "pattern = re.compile('http://(.*\\d+?)_\\w+_\\w+\\.html$|http://(.*\\d+?)_\\w+\\.html$|http://(.*\\w+?).html$',re.S)\n",
    "c['websitemain'] = np.nan\n",
    "for i in range(len(c)):\n",
    "    items = re.findall(pattern, c.index[i])\n",
    "    if len(items)== 0:\n",
    "        temp = np.nan\n",
    "    else:\n",
    "        for j in items[0]:\n",
    "            if j !='':\n",
    "                temp = j\n",
    "    c.iloc[i,1] = temp\n",
    "c"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>websitemain</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>www.lawtime.cn/info/hetong/ldht/201311152872128</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>www.lawtime.cn/info/minshi/fagui/2013051382463</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>www.lawtime.cn/info/shuifa/slb/2012111978933</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>www.lawtime.cn/info/hunyin/lhlawlhxy/20110707137693</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>www.lawtime.cn/ask/exp/17357</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>www.lawtime.cn/faguizt/27</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>www.lawtime.cn/info/laodong/gongshixiujia/201412253312096</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>www.lawtime.cn/faguizt/131</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>www.lawtime.cn/faguizt/168</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>www.lawtime.cn/faguizt/25</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>www.lawtime.cn/faguizt/19</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>www.lawtime.cn/info/shuifa/grsds/201402172880722</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>www.lawtime.cn/faguizt/79</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>www.lawtime.cn/faguizt/23</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>www.lawtime.cn/faguizt/91</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>www.lawtime.cn/faguizt/43</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>www.lawtime.cn/ask/exp/13655</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>www.lawtime.cn/ask/exp/13653</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>www.lawtime.cn/faguizt/291</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>www.lawtime.cn/ask/exp/13445</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>www.lawtime.cn/faguizt/117</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>www.lawtime.cn/faguizt/99</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>www.lawtime.cn/faguizt/41</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>www.lawtime.cn/info/laodong/laodongbaoxian/bx/20131216141593</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>www.lawtime.cn/faguizt/11</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>www.lawtime.cn/faguizt/21</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>www.lawtime.cn/ask/online/138</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>www.lawtime.cn/info/shuifa/grsds/201410313308870</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>www.lawtime.cn/info/jiaotong/jtlawjtxgfg/201411273309942</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>www.lawtime.cn/faguizt/15</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>www.lawtime.cn/ask/online/139</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>www.lawtime.cn/info/shuifa/slb/201401232878784</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>www.lawtime.cn/ask/exp/8495</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>www.lawtime.cn/faguizt/39</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                    websitemain\n",
       "www.lawtime.cn/info/hetong/ldht/201311152872128               2\n",
       "www.lawtime.cn/info/minshi/fagui/2013051382463                2\n",
       "www.lawtime.cn/info/shuifa/slb/2012111978933                  2\n",
       "www.lawtime.cn/info/hunyin/lhlawlhxy/2011070713...            2\n",
       "www.lawtime.cn/ask/exp/17357                                  1\n",
       "www.lawtime.cn/faguizt/27                                     1\n",
       "www.lawtime.cn/info/laodong/gongshixiujia/20141...            1\n",
       "www.lawtime.cn/faguizt/131                                    1\n",
       "www.lawtime.cn/faguizt/168                                    1\n",
       "www.lawtime.cn/faguizt/25                                     1\n",
       "www.lawtime.cn/faguizt/19                                     1\n",
       "www.lawtime.cn/info/shuifa/grsds/201402172880722              1\n",
       "www.lawtime.cn/faguizt/79                                     1\n",
       "www.lawtime.cn/faguizt/23                                     1\n",
       "www.lawtime.cn/faguizt/91                                     1\n",
       "www.lawtime.cn/faguizt/43                                     1\n",
       "www.lawtime.cn/ask/exp/13655                                  1\n",
       "www.lawtime.cn/ask/exp/13653                                  1\n",
       "www.lawtime.cn/faguizt/291                                    1\n",
       "www.lawtime.cn/ask/exp/13445                                  1\n",
       "www.lawtime.cn/faguizt/117                                    1\n",
       "www.lawtime.cn/faguizt/99                                     1\n",
       "www.lawtime.cn/faguizt/41                                     1\n",
       "www.lawtime.cn/info/laodong/laodongbaoxian/bx/2...            1\n",
       "www.lawtime.cn/faguizt/11                                     1\n",
       "www.lawtime.cn/faguizt/21                                     1\n",
       "www.lawtime.cn/ask/online/138                                 1\n",
       "www.lawtime.cn/info/shuifa/grsds/201410313308870              1\n",
       "www.lawtime.cn/info/jiaotong/jtlawjtxgfg/201411...            1\n",
       "www.lawtime.cn/faguizt/15                                     1\n",
       "www.lawtime.cn/ask/online/139                                 1\n",
       "www.lawtime.cn/info/shuifa/slb/201401232878784                1\n",
       "www.lawtime.cn/ask/exp/8495                                   1\n",
       "www.lawtime.cn/faguizt/39                                     1"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 获取所有网页主体的网页数\n",
    "d = c['websitemain'].value_counts()\n",
    "d = pd.DataFrame(d)\n",
    "d"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:6: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  \n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>点击次数</th>\n",
       "      <th>websitemain</th>\n",
       "      <th>Times</th>\n",
       "      <th>num</th>\n",
       "      <th>per</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>网址</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/hetong/ldht/201311152872128_2.html</th>\n",
       "      <td>95</td>\n",
       "      <td>www.lawtime.cn/info/hetong/ldht/201311152872128</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/hetong/ldht/201311152872128_3.html</th>\n",
       "      <td>62</td>\n",
       "      <td>www.lawtime.cn/info/hetong/ldht/201311152872128</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/hunyin/lhlawlhxy/20110707137693.html</th>\n",
       "      <td>484</td>\n",
       "      <td>www.lawtime.cn/info/hunyin/lhlawlhxy/201107071...</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/hunyin/lhlawlhxy/20110707137693_2.html</th>\n",
       "      <td>298</td>\n",
       "      <td>www.lawtime.cn/info/hunyin/lhlawlhxy/201107071...</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/minshi/fagui/2013051382463_4.html</th>\n",
       "      <td>175</td>\n",
       "      <td>www.lawtime.cn/info/minshi/fagui/2013051382463</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/minshi/fagui/2013051382463_5.html</th>\n",
       "      <td>53</td>\n",
       "      <td>www.lawtime.cn/info/minshi/fagui/2013051382463</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/shuifa/slb/2012111978933.html</th>\n",
       "      <td>643</td>\n",
       "      <td>www.lawtime.cn/info/shuifa/slb/2012111978933</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/shuifa/slb/2012111978933_2.html</th>\n",
       "      <td>378</td>\n",
       "      <td>www.lawtime.cn/info/shuifa/slb/2012111978933</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                    点击次数  \\\n",
       "网址                                                         \n",
       "http://www.lawtime.cn/info/hetong/ldht/20131115...    95   \n",
       "http://www.lawtime.cn/info/hetong/ldht/20131115...    62   \n",
       "http://www.lawtime.cn/info/hunyin/lhlawlhxy/201...   484   \n",
       "http://www.lawtime.cn/info/hunyin/lhlawlhxy/201...   298   \n",
       "http://www.lawtime.cn/info/minshi/fagui/2013051...   175   \n",
       "http://www.lawtime.cn/info/minshi/fagui/2013051...    53   \n",
       "http://www.lawtime.cn/info/shuifa/slb/201211197...   643   \n",
       "http://www.lawtime.cn/info/shuifa/slb/201211197...   378   \n",
       "\n",
       "                                                                                          websitemain  \\\n",
       "网址                                                                                                      \n",
       "http://www.lawtime.cn/info/hetong/ldht/20131115...    www.lawtime.cn/info/hetong/ldht/201311152872128   \n",
       "http://www.lawtime.cn/info/hetong/ldht/20131115...    www.lawtime.cn/info/hetong/ldht/201311152872128   \n",
       "http://www.lawtime.cn/info/hunyin/lhlawlhxy/201...  www.lawtime.cn/info/hunyin/lhlawlhxy/201107071...   \n",
       "http://www.lawtime.cn/info/hunyin/lhlawlhxy/201...  www.lawtime.cn/info/hunyin/lhlawlhxy/201107071...   \n",
       "http://www.lawtime.cn/info/minshi/fagui/2013051...     www.lawtime.cn/info/minshi/fagui/2013051382463   \n",
       "http://www.lawtime.cn/info/minshi/fagui/2013051...     www.lawtime.cn/info/minshi/fagui/2013051382463   \n",
       "http://www.lawtime.cn/info/shuifa/slb/201211197...       www.lawtime.cn/info/shuifa/slb/2012111978933   \n",
       "http://www.lawtime.cn/info/shuifa/slb/201211197...       www.lawtime.cn/info/shuifa/slb/2012111978933   \n",
       "\n",
       "                                                    Times  num  per  \n",
       "网址                                                                   \n",
       "http://www.lawtime.cn/info/hetong/ldht/20131115...      2    1  NaN  \n",
       "http://www.lawtime.cn/info/hetong/ldht/20131115...      2    1  NaN  \n",
       "http://www.lawtime.cn/info/hunyin/lhlawlhxy/201...      2    4  NaN  \n",
       "http://www.lawtime.cn/info/hunyin/lhlawlhxy/201...      2    4  NaN  \n",
       "http://www.lawtime.cn/info/minshi/fagui/2013051...      2    2  NaN  \n",
       "http://www.lawtime.cn/info/minshi/fagui/2013051...      2    2  NaN  \n",
       "http://www.lawtime.cn/info/shuifa/slb/201211197...      2    3  NaN  \n",
       "http://www.lawtime.cn/info/shuifa/slb/201211197...      2    3  NaN  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 统计网页主体出现次数为不少于二次的，即存在翻页的网址\n",
    "e = d[d['websitemain']>=2]\n",
    "e.columns=['Times']#记录某网页及子网页出现的此处\n",
    "e.index.name='websitemain'# 主网页\n",
    " \n",
    "e['num'] = np.arange(1,len(e)+1) \n",
    "f = pd.merge(c,e,left_on='websitemain',right_index=True,how='right')\n",
    "f.sort_index(inplace=True)\n",
    "f['per'] = np.nan\n",
    "f# 相同num的网页是拥有同一网页主体"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\ProgramData\\Anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py:543: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  self.obj[item] = s\n"
     ]
    }
   ],
   "source": [
    "# 统计翻子页的点击率与上一页网页点击率的比重（注意：用此处这个方法对网页翻页后序号有10页及以上的合适\n",
    "def getper(x):\n",
    "    x.sort_index(inplace=True) #必须先排序将网页\n",
    "    x\n",
    "    for i in range(len(x)-1):\n",
    "        x.iloc[i+1,-1] = x.iloc[i+1,0]/x.iloc[i,0]\n",
    "    return x    \n",
    "        \n",
    " \n",
    "result = pd.DataFrame([]) # 用一个空表格记录值\n",
    "for i in range(1,f['num'].max()+1):#count36['num'].max()+1\n",
    "    k= getper(f[f['num'] == i])\n",
    "    result = pd.concat([result,k])# 每次进行一次操作时"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2    8\n",
       "Name: Times, dtype: int64"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "f['Times'].value_counts() # 由统计结果看，只有一个主网址出现过10次及以上，该数据采用上述方法会出问题，因此，在结果中将其剔除后观察剩余数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>点击次数</th>\n",
       "      <th>websitemain</th>\n",
       "      <th>Times</th>\n",
       "      <th>num</th>\n",
       "      <th>per</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>网址</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/hetong/ldht/201311152872128_2.html</th>\n",
       "      <td>95</td>\n",
       "      <td>www.lawtime.cn/info/hetong/ldht/201311152872128</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/hetong/ldht/201311152872128_3.html</th>\n",
       "      <td>62</td>\n",
       "      <td>www.lawtime.cn/info/hetong/ldht/201311152872128</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0.652632</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/minshi/fagui/2013051382463_4.html</th>\n",
       "      <td>175</td>\n",
       "      <td>www.lawtime.cn/info/minshi/fagui/2013051382463</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/minshi/fagui/2013051382463_5.html</th>\n",
       "      <td>53</td>\n",
       "      <td>www.lawtime.cn/info/minshi/fagui/2013051382463</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0.302857</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/shuifa/slb/2012111978933.html</th>\n",
       "      <td>643</td>\n",
       "      <td>www.lawtime.cn/info/shuifa/slb/2012111978933</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/shuifa/slb/2012111978933_2.html</th>\n",
       "      <td>378</td>\n",
       "      <td>www.lawtime.cn/info/shuifa/slb/2012111978933</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>0.587869</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/hunyin/lhlawlhxy/20110707137693.html</th>\n",
       "      <td>484</td>\n",
       "      <td>www.lawtime.cn/info/hunyin/lhlawlhxy/201107071...</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://www.lawtime.cn/info/hunyin/lhlawlhxy/20110707137693_2.html</th>\n",
       "      <td>298</td>\n",
       "      <td>www.lawtime.cn/info/hunyin/lhlawlhxy/201107071...</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>0.615702</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                    点击次数  \\\n",
       "网址                                                         \n",
       "http://www.lawtime.cn/info/hetong/ldht/20131115...    95   \n",
       "http://www.lawtime.cn/info/hetong/ldht/20131115...    62   \n",
       "http://www.lawtime.cn/info/minshi/fagui/2013051...   175   \n",
       "http://www.lawtime.cn/info/minshi/fagui/2013051...    53   \n",
       "http://www.lawtime.cn/info/shuifa/slb/201211197...   643   \n",
       "http://www.lawtime.cn/info/shuifa/slb/201211197...   378   \n",
       "http://www.lawtime.cn/info/hunyin/lhlawlhxy/201...   484   \n",
       "http://www.lawtime.cn/info/hunyin/lhlawlhxy/201...   298   \n",
       "\n",
       "                                                                                          websitemain  \\\n",
       "网址                                                                                                      \n",
       "http://www.lawtime.cn/info/hetong/ldht/20131115...    www.lawtime.cn/info/hetong/ldht/201311152872128   \n",
       "http://www.lawtime.cn/info/hetong/ldht/20131115...    www.lawtime.cn/info/hetong/ldht/201311152872128   \n",
       "http://www.lawtime.cn/info/minshi/fagui/2013051...     www.lawtime.cn/info/minshi/fagui/2013051382463   \n",
       "http://www.lawtime.cn/info/minshi/fagui/2013051...     www.lawtime.cn/info/minshi/fagui/2013051382463   \n",
       "http://www.lawtime.cn/info/shuifa/slb/201211197...       www.lawtime.cn/info/shuifa/slb/2012111978933   \n",
       "http://www.lawtime.cn/info/shuifa/slb/201211197...       www.lawtime.cn/info/shuifa/slb/2012111978933   \n",
       "http://www.lawtime.cn/info/hunyin/lhlawlhxy/201...  www.lawtime.cn/info/hunyin/lhlawlhxy/201107071...   \n",
       "http://www.lawtime.cn/info/hunyin/lhlawlhxy/201...  www.lawtime.cn/info/hunyin/lhlawlhxy/201107071...   \n",
       "\n",
       "                                                    Times  num       per  \n",
       "网址                                                                        \n",
       "http://www.lawtime.cn/info/hetong/ldht/20131115...      2    1       NaN  \n",
       "http://www.lawtime.cn/info/hetong/ldht/20131115...      2    1  0.652632  \n",
       "http://www.lawtime.cn/info/minshi/fagui/2013051...      2    2       NaN  \n",
       "http://www.lawtime.cn/info/minshi/fagui/2013051...      2    2  0.302857  \n",
       "http://www.lawtime.cn/info/shuifa/slb/201211197...      2    3       NaN  \n",
       "http://www.lawtime.cn/info/shuifa/slb/201211197...      2    3  0.587869  \n",
       "http://www.lawtime.cn/info/hunyin/lhlawlhxy/201...      2    4       NaN  \n",
       "http://www.lawtime.cn/info/hunyin/lhlawlhxy/201...      2    4  0.615702  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "flipPageResult = result[result['Times']<10]\n",
    "#保存的表名命名格式为“1_3_k此表功能名称”，是此小节生成的第1张表格，功能为flipPageResult：统计翻子页的点击率与上一页网页点击率的比重\n",
    "flipPageResult.to_excel('./tmp/3_flipPageResult.xlsx')\n",
    "flipPageResult"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
